Load Libraries and set working directory

### 
libs<-c('scater','loomR','Seurat','patchwork','SeuratDisk','monocle3','ggpubr','cowplot','ggplot2','ggbeeswarm','clusterProfiler',
        'monocle3','limma','edgeR','fitdistrplus','factoextra','ggrepel','tidyverse','pheatmap','reshape2','ComplexHeatmap',
        "survminer","survival","ggalluvial",'gtsummary','variancePartition','DirichletReg','cowsay','emmeans','forestmodel')
lapply(libs, require, character.only = TRUE) ; rm(libs)
### 
setwd("/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/")

Load data and/or tables

file_717_per_cell_md <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/1110_per_cell_md.rds')
load(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/clinical_metadata_IA22_Feb03_release.RData')

Data cleaning and preparation

### Mark doublets in a binary variable
file_717_per_cell_md$doublet_pred_edit <- file_717_per_cell_md$doublet_pred
file_717_per_cell_md$doublet_pred_edit[file_717_per_cell_md$doublet_pred_edit %in% c('Plasma_B','poss_singlet_cluster','singlet')] <- 'singlet'
cmia22 <- clinical_metadata_IA22_Feb03_release[which(clinical_metadata_IA22_Feb03_release$public_id %in% file_717_per_cell_md$public_id),]
cmia22 <- cmia22[,colnames(cmia22) %in% c('censpfs','ttcpfs','censos','ttcos', 'collection_event','sample_id','d_pt_sex','public_id')]
cmia22 <- unique(cmia22)
###
my_vars <- c('public_id','davies_based_risk','visit_type','siteXbatch','progression_group','d_dx_amm_age','d_dx_amm_iss_stage','collection_event','sample_id')
###
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% my_vars)]
rownames(x) <- NULL ; x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
x$siteXbatch <- NULL
meta_df <- x
meta_df <- merge(meta_df,cmia22,by='public_id',all=TRUE) ; rm(cmia22)

Curent Survival and Progression profiles

### subset
ix <- which(meta_df$davies_based_risk %in% c('standard_risk','high_risk') & meta_df$collection_event %in% 'Baseline')
tmp_df <- meta_df[ix,]
### PFS SURV
tmp_df_pfs <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ davies_based_risk, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")

### PFS COX 
x <- coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                 HR = x$table_body$estimate , Marker = 'PFS (ttcpfs, censpfs)' , label=x$table_body$label)  

### SURV OS
tmp_df_os <- surv_pvalue( survfit(Surv(ttcos, censos==1) ~ davies_based_risk, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")

### COX OS
x <- coxph(Surv(ttcos, censos==1) ~ davies_based_risk, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                 HR = x$table_body$estimate , Marker = 'OS (ttcos, censos)' , label=x$table_body$label)  

Survival

ggsurvplot( fit = survfit(Surv(ttcos, censos==1) ~ davies_based_risk, data = tmp_df), 
            type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
            xlab = "Days", ylab = "OS Prob.",pval = TRUE,risk.table = TRUE,palette = c('orange','skyblue3'))

Sentivity to covariates

summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk, 
##     data = tmp_df)
## 
##   n= 230, number of events= 83 
## 
##                                   coef exp(coef) se(coef)      z Pr(>|z|)   
## davies_based_riskstandard_risk -0.6839    0.5046   0.2305 -2.967  0.00301 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.5046      1.982    0.3212    0.7928
## 
## Concordance= 0.579  (se = 0.028 )
## Likelihood ratio test= 9.26  on 1 df,   p=0.002
## Wald test            = 8.8  on 1 df,   p=0.003
## Score (logrank) test = 9.15  on 1 df,   p=0.002
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_dx_amm_age, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_dx_amm_age, data = tmp_df)
## 
##   n= 230, number of events= 83 
## 
##                                    coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.43817   0.64522  0.23677 -1.851   0.0642 .  
## d_dx_amm_age                    0.05341   1.05486  0.01104  4.840  1.3e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.6452      1.550    0.4057     1.026
## d_dx_amm_age                      1.0549      0.948    1.0323     1.078
## 
## Concordance= 0.674  (se = 0.029 )
## Likelihood ratio test= 33.13  on 2 df,   p=6e-08
## Wald test            = 32.69  on 2 df,   p=8e-08
## Score (logrank) test = 32.22  on 2 df,   p=1e-07
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_pt_sex, data = tmp_df)
## 
##   n= 230, number of events= 83 
## 
##                                   coef exp(coef) se(coef)      z Pr(>|z|)   
## davies_based_riskstandard_risk -0.6984    0.4974   0.2308 -3.026  0.00248 **
## d_pt_sexmale                    0.2806    1.3239   0.2347  1.196  0.23185   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.4974     2.0105    0.3164    0.7819
## d_pt_sexmale                      1.3239     0.7554    0.8358    2.0969
## 
## Concordance= 0.598  (se = 0.031 )
## Likelihood ratio test= 10.73  on 2 df,   p=0.005
## Wald test            = 10.25  on 2 df,   p=0.006
## Score (logrank) test = 10.6  on 2 df,   p=0.005
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_dx_amm_iss_stage, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_dx_amm_iss_stage, data = tmp_df)
## 
##   n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
## 
##                                   coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.5752    0.5626   0.2380 -2.417   0.0156 *  
## d_dx_amm_iss_stage              0.7338    2.0829   0.1572  4.669 3.02e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.5626     1.7776    0.3529    0.8969
## d_dx_amm_iss_stage                2.0829     0.4801    1.5308    2.8343
## 
## Concordance= 0.682  (se = 0.028 )
## Likelihood ratio test= 32.62  on 2 df,   p=8e-08
## Wald test            = 30.39  on 2 df,   p=3e-07
## Score (logrank) test = 32.51  on 2 df,   p=9e-08
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age, data = tmp_df)
## 
##   n= 230, number of events= 83 
## 
##                                    coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.45043   0.63735  0.23681 -1.902   0.0572 .  
## d_pt_sexmale                    0.37352   1.45283  0.23571  1.585   0.1130    
## d_dx_amm_age                    0.05493   1.05646  0.01109  4.954 7.29e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.6374     1.5690    0.4007     1.014
## d_pt_sexmale                      1.4528     0.6883    0.9153     2.306
## d_dx_amm_age                      1.0565     0.9466    1.0338     1.080
## 
## Concordance= 0.675  (se = 0.03 )
## Likelihood ratio test= 35.74  on 3 df,   p=9e-08
## Wald test            = 35.16  on 3 df,   p=1e-07
## Score (logrank) test = 34.84  on 3 df,   p=1e-07
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage, data = tmp_df)
## 
##   n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
## 
##                                    coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.46832   0.62605  0.23885 -1.961   0.0499 *  
## d_pt_sexmale                    0.38137   1.46429  0.24007  1.589   0.1122    
## d_dx_amm_age                    0.05047   1.05177  0.01139  4.430 9.44e-06 ***
## d_dx_amm_iss_stage              0.63491   1.88685  0.15666  4.053 5.06e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.6261     1.5973    0.3920    0.9998
## d_pt_sexmale                      1.4643     0.6829    0.9147    2.3441
## d_dx_amm_age                      1.0518     0.9508    1.0285    1.0755
## d_dx_amm_iss_stage                1.8869     0.5300    1.3880    2.5650
## 
## Concordance= 0.724  (se = 0.027 )
## Likelihood ratio test= 54.82  on 4 df,   p=4e-11
## Wald test            = 51.44  on 4 df,   p=2e-10
## Score (logrank) test = 54.61  on 4 df,   p=4e-11
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch, data = tmp_df)
## 
##   n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
## 
##                                    coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.65638   0.51873  0.38826 -1.691   0.0909 .  
## d_pt_sexmale                    0.35906   1.43198  0.24095  1.490   0.1362    
## d_dx_amm_age                    0.05157   1.05292  0.01164  4.431 9.39e-06 ***
## d_dx_amm_iss_stage              0.62311   1.86473  0.15721  3.963 7.39e-05 ***
## batchB2                         0.17863   1.19558  0.47876  0.373   0.7091    
## batchB3                         0.33893   1.40345  0.41764  0.812   0.4171    
## batchB4                         0.41860   1.51983  0.36090  1.160   0.2461    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.5187     1.9278    0.2424     1.110
## d_pt_sexmale                      1.4320     0.6983    0.8930     2.296
## d_dx_amm_age                      1.0529     0.9497    1.0292     1.077
## d_dx_amm_iss_stage                1.8647     0.5363    1.3702     2.538
## batchB2                           1.1956     0.8364    0.4678     3.056
## batchB3                           1.4034     0.7125    0.6190     3.182
## batchB4                           1.5198     0.6580    0.7492     3.083
## 
## Concordance= 0.73  (se = 0.027 )
## Likelihood ratio test= 56.36  on 7 df,   p=8e-10
## Wald test            = 53.42  on 7 df,   p=3e-09
## Score (logrank) test = 57.62  on 7 df,   p=4e-10
summary(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, 
##     data = tmp_df)
## 
##   n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
## 
##                                    coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.60505   0.54605  0.39396 -1.536   0.1246    
## d_pt_sexmale                    0.41035   1.50735  0.24845  1.652   0.0986 .  
## d_dx_amm_age                    0.05198   1.05335  0.01184  4.389 1.14e-05 ***
## d_dx_amm_iss_stage              0.66516   1.94481  0.16126  4.125 3.71e-05 ***
## batchB2                         0.11673   1.12381  0.48801  0.239   0.8110    
## batchB3                         0.28188   1.32561  0.41555  0.678   0.4976    
## batchB4                         0.10450   1.11015  0.42877  0.244   0.8075    
## siteMAYO                       -0.26594   0.76649  0.37022 -0.718   0.4726    
## siteMSSM                       -0.21751   0.80452  0.37414 -0.581   0.5610    
## siteWUSTL                       0.19541   1.21581  0.36393  0.537   0.5913    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.5460     1.8313    0.2523     1.182
## d_pt_sexmale                      1.5073     0.6634    0.9263     2.453
## d_dx_amm_age                      1.0534     0.9493    1.0292     1.078
## d_dx_amm_iss_stage                1.9448     0.5142    1.4178     2.668
## batchB2                           1.1238     0.8898    0.4318     2.925
## batchB3                           1.3256     0.7544    0.5871     2.993
## batchB4                           1.1102     0.9008    0.4791     2.572
## siteMAYO                          0.7665     1.3047    0.3710     1.584
## siteMSSM                          0.8045     1.2430    0.3864     1.675
## siteWUSTL                         1.2158     0.8225    0.5958     2.481
## 
## Concordance= 0.735  (se = 0.028 )
## Likelihood ratio test= 58.68  on 10 df,   p=6e-09
## Wald test            = 54.72  on 10 df,   p=4e-08
## Score (logrank) test = 59.01  on 10 df,   p=6e-09
coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
davies_based_risk


    high_risk — —
    standard_risk 0.55 0.25, 1.18 0.12
d_pt_sex


    female — —
    male 1.51 0.93, 2.45 0.10
d_dx_amm_age 1.05 1.03, 1.08 <0.001
d_dx_amm_iss_stage 1.94 1.42, 2.67 <0.001
batch


    B1 — —
    B2 1.12 0.43, 2.92 0.8
    B3 1.33 0.59, 2.99 0.5
    B4 1.11 0.48, 2.57 0.8
site


    EMORY — —
    MAYO 0.77 0.37, 1.58 0.5
    MSSM 0.80 0.39, 1.67 0.6
    WUSTL 1.22 0.60, 2.48 0.6
1 HR = Hazard Ratio, CI = Confidence Interval
forest_model(coxph(Surv(ttcos, censos==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, data = tmp_df))
## Warning in recalculate_width_panels(panel_positions, mapped_text = mapped_text,
## : Unable to resize forest panel to be smaller than its heading; consider a
## smaller text size

Progression

ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ davies_based_risk, data = tmp_df), 
            type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
            xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('orange','skyblue3'))

summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk, 
##     data = tmp_df)
## 
##   n= 230, number of events= 143 
## 
##                                   coef exp(coef) se(coef)      z Pr(>|z|)   
## davies_based_riskstandard_risk -0.4448    0.6409   0.1696 -2.623   0.0087 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.6409       1.56    0.4597    0.8936
## 
## Concordance= 0.567  (se = 0.022 )
## Likelihood ratio test= 6.98  on 1 df,   p=0.008
## Wald test            = 6.88  on 1 df,   p=0.009
## Score (logrank) test = 6.99  on 1 df,   p=0.008
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_dx_amm_age, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_dx_amm_age, data = tmp_df)
## 
##   n= 230, number of events= 143 
## 
##                                     coef exp(coef)  se(coef)      z Pr(>|z|)
## davies_based_riskstandard_risk -0.306911  0.735716  0.173466 -1.769  0.07685
## d_dx_amm_age                    0.032452  1.032984  0.008754  3.707  0.00021
##                                   
## davies_based_riskstandard_risk .  
## d_dx_amm_age                   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.7357     1.3592    0.5237     1.034
## d_dx_amm_age                      1.0330     0.9681    1.0154     1.051
## 
## Concordance= 0.619  (se = 0.025 )
## Likelihood ratio test= 20.95  on 2 df,   p=3e-05
## Wald test            = 20.57  on 2 df,   p=3e-05
## Score (logrank) test = 20.45  on 2 df,   p=4e-05
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_pt_sex, data = tmp_df)
## 
##   n= 230, number of events= 143 
## 
##                                    coef exp(coef) se(coef)      z Pr(>|z|)   
## davies_based_riskstandard_risk -0.44701   0.63954  0.17026 -2.625  0.00865 **
## d_pt_sexmale                    0.02462   1.02492  0.17497  0.141  0.88811   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.6395     1.5636    0.4581    0.8929
## d_pt_sexmale                      1.0249     0.9757    0.7274    1.4442
## 
## Concordance= 0.571  (se = 0.025 )
## Likelihood ratio test= 7  on 2 df,   p=0.03
## Wald test            = 6.9  on 2 df,   p=0.03
## Score (logrank) test = 7.01  on 2 df,   p=0.03
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_dx_amm_iss_stage, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_dx_amm_iss_stage, data = tmp_df)
## 
##   n= 220, number of events= 136 
##    (10 observations deleted due to missingness)
## 
##                                   coef exp(coef) se(coef)      z Pr(>|z|)    
## davies_based_riskstandard_risk -0.3460    0.7075   0.1763 -1.962   0.0497 *  
## d_dx_amm_iss_stage              0.4619    1.5871   0.1169  3.950 7.83e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.7075     1.4134    0.5008    0.9996
## d_dx_amm_iss_stage                1.5871     0.6301    1.2620    1.9959
## 
## Concordance= 0.623  (se = 0.024 )
## Likelihood ratio test= 22.76  on 2 df,   p=1e-05
## Wald test            = 22.39  on 2 df,   p=1e-05
## Score (logrank) test = 23.05  on 2 df,   p=1e-05
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age, data = tmp_df)
## 
##   n= 230, number of events= 143 
## 
##                                     coef exp(coef)  se(coef)      z Pr(>|z|)
## davies_based_riskstandard_risk -0.310562  0.733035  0.174043 -1.784 0.074359
## d_pt_sexmale                    0.044009  1.044992  0.175216  0.251 0.801681
## d_dx_amm_age                    0.032549  1.033084  0.008772  3.710 0.000207
##                                   
## davies_based_riskstandard_risk .  
## d_pt_sexmale                      
## d_dx_amm_age                   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk     0.733     1.3642    0.5212     1.031
## d_pt_sexmale                       1.045     0.9569    0.7413     1.473
## d_dx_amm_age                       1.033     0.9680    1.0155     1.051
## 
## Concordance= 0.62  (se = 0.025 )
## Likelihood ratio test= 21.01  on 3 df,   p=1e-04
## Wald test            = 20.58  on 3 df,   p=1e-04
## Score (logrank) test = 20.5  on 3 df,   p=1e-04
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage, data = tmp_df)
## 
##   n= 220, number of events= 136 
##    (10 observations deleted due to missingness)
## 
##                                     coef exp(coef)  se(coef)      z Pr(>|z|)
## davies_based_riskstandard_risk -0.272531  0.761450  0.177851 -1.532 0.125435
## d_pt_sexmale                    0.008101  1.008134  0.179756  0.045 0.964056
## d_dx_amm_age                    0.027368  1.027746  0.008952  3.057 0.002233
## d_dx_amm_iss_stage              0.411160  1.508567  0.118537  3.469 0.000523
##                                   
## davies_based_riskstandard_risk    
## d_pt_sexmale                      
## d_dx_amm_age                   ** 
## d_dx_amm_iss_stage             ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    0.7614     1.3133    0.5373     1.079
## d_pt_sexmale                      1.0081     0.9919    0.7088     1.434
## d_dx_amm_age                      1.0277     0.9730    1.0099     1.046
## d_dx_amm_iss_stage                1.5086     0.6629    1.1958     1.903
## 
## Concordance= 0.645  (se = 0.024 )
## Likelihood ratio test= 32.36  on 4 df,   p=2e-06
## Wald test            = 31.53  on 4 df,   p=2e-06
## Score (logrank) test = 32.31  on 4 df,   p=2e-06
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch, data = tmp_df)
## 
##   n= 220, number of events= 136 
##    (10 observations deleted due to missingness)
## 
##                                     coef exp(coef)  se(coef)      z Pr(>|z|)
## davies_based_riskstandard_risk  0.038678  1.039436  0.321029  0.120 0.904101
## d_pt_sexmale                    0.036327  1.036995  0.181236  0.200 0.841138
## d_dx_amm_age                    0.027896  1.028288  0.008954  3.115 0.001837
## d_dx_amm_iss_stage              0.419301  1.520898  0.119930  3.496 0.000472
## batchB2                        -0.647691  0.523252  0.386927 -1.674 0.094143
## batchB3                         0.035576  1.036217  0.337998  0.105 0.916172
## batchB4                        -0.332520  0.717114  0.330716 -1.005 0.314678
##                                   
## davies_based_riskstandard_risk    
## d_pt_sexmale                      
## d_dx_amm_age                   ** 
## d_dx_amm_iss_stage             ***
## batchB2                        .  
## batchB3                           
## batchB4                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    1.0394     0.9621    0.5540     1.950
## d_pt_sexmale                      1.0370     0.9643    0.7270     1.479
## d_dx_amm_age                      1.0283     0.9725    1.0104     1.046
## d_dx_amm_iss_stage                1.5209     0.6575    1.2023     1.924
## batchB2                           0.5233     1.9111    0.2451     1.117
## batchB3                           1.0362     0.9650    0.5343     2.010
## batchB4                           0.7171     1.3945    0.3750     1.371
## 
## Concordance= 0.649  (se = 0.025 )
## Likelihood ratio test= 39.18  on 7 df,   p=2e-06
## Wald test            = 38.07  on 7 df,   p=3e-06
## Score (logrank) test = 39.22  on 7 df,   p=2e-06
summary(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, data = tmp_df))
## Call:
## coxph(formula = Surv(ttcpfs, censpfs == 1) ~ davies_based_risk + 
##     d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, 
##     data = tmp_df)
## 
##   n= 220, number of events= 136 
##    (10 observations deleted due to missingness)
## 
##                                      coef  exp(coef)   se(coef)      z Pr(>|z|)
## davies_based_riskstandard_risk  0.0581761  1.0599016  0.3267039  0.178 0.858668
## d_pt_sexmale                    0.0875519  1.0914989  0.1881030  0.465 0.641612
## d_dx_amm_age                    0.0275172  1.0278993  0.0089888  3.061 0.002204
## d_dx_amm_iss_stage              0.4443178  1.5594260  0.1225004  3.627 0.000287
## batchB2                        -0.6605975  0.5165426  0.3910928 -1.689 0.091199
## batchB3                         0.0003138  1.0003139  0.3395023  0.001 0.999262
## batchB4                        -0.4766479  0.6208611  0.3765160 -1.266 0.205533
## siteMAYO                       -0.2633926  0.7684401  0.2874548 -0.916 0.359514
## siteMSSM                       -0.0863609  0.9172631  0.2729200 -0.316 0.751674
## siteWUSTL                       0.0380941  1.0388290  0.2761889  0.138 0.890298
##                                   
## davies_based_riskstandard_risk    
## d_pt_sexmale                      
## d_dx_amm_age                   ** 
## d_dx_amm_iss_stage             ***
## batchB2                        .  
## batchB3                           
## batchB4                           
## siteMAYO                          
## siteMSSM                          
## siteWUSTL                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                exp(coef) exp(-coef) lower .95 upper .95
## davies_based_riskstandard_risk    1.0599     0.9435    0.5587     2.011
## d_pt_sexmale                      1.0915     0.9162    0.7549     1.578
## d_dx_amm_age                      1.0279     0.9729    1.0099     1.046
## d_dx_amm_iss_stage                1.5594     0.6413    1.2266     1.983
## batchB2                           0.5165     1.9359    0.2400     1.112
## batchB3                           1.0003     0.9997    0.5142     1.946
## batchB4                           0.6209     1.6107    0.2968     1.299
## siteMAYO                          0.7684     1.3013    0.4374     1.350
## siteMSSM                          0.9173     1.0902    0.5373     1.566
## siteWUSTL                         1.0388     0.9626    0.6046     1.785
## 
## Concordance= 0.654  (se = 0.024 )
## Likelihood ratio test= 40.66  on 10 df,   p=1e-05
## Wald test            = 38.98  on 10 df,   p=3e-05
## Score (logrank) test = 40.25  on 10 df,   p=2e-05
coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
davies_based_risk


    high_risk — —
    standard_risk 1.06 0.56, 2.01 0.9
d_pt_sex


    female — —
    male 1.09 0.75, 1.58 0.6
d_dx_amm_age 1.03 1.01, 1.05 0.002
d_dx_amm_iss_stage 1.56 1.23, 1.98 <0.001
batch


    B1 — —
    B2 0.52 0.24, 1.11 0.091
    B3 1.00 0.51, 1.95 >0.9
    B4 0.62 0.30, 1.30 0.2
site


    EMORY — —
    MAYO 0.77 0.44, 1.35 0.4
    MSSM 0.92 0.54, 1.57 0.8
    WUSTL 1.04 0.60, 1.78 0.9
1 HR = Hazard Ratio, CI = Confidence Interval
forest_model(coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + batch + site, data = tmp_df))
## Warning in recalculate_width_panels(panel_positions, mapped_text = mapped_text,
## : Unable to resize forest panel to be smaller than its heading; consider a
## smaller text size

Prepare single cell populations data (subclusters)

rm(x,y,tmp_df)
###
ix <- which(meta_df$davies_based_risk %in% c('standard_risk','high_risk') & meta_df$collection_event %in% 'Baseline')
meta_baseline_df <- meta_df[ix,]

### All populations
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'

my_doublets <- unique(as.character(file_717_per_cell_md$subcluster_V03072023[file_717_per_cell_md$doublet_pred %in% c('dblet_cluster','poss_dblet_cluster')]))

y <- y[!rownames(y) %in% my_doublets,]

### remove non baseline/relevant
y <- y[,which(colnames(y) %in% meta_baseline_df$sample_id)]

cell_props_all = DR_data(t(y))
identical(meta_baseline_df$sample_id,rownames(cell_props_all))
## [1] FALSE
cell_props_all <- cell_props_all[match(meta_baseline_df$sample_id,rownames(cell_props_all)),]
identical(meta_baseline_df$sample_id,rownames(cell_props_all))
## [1] TRUE

Prepare single cell populations data (Subclusters per compartment)

### All populations
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'

my_doublets <- unique(as.character(file_717_per_cell_md$subcluster_V03072023[file_717_per_cell_md$doublet_pred %in% c('dblet_cluster','poss_dblet_cluster')]))

y <- y[!rownames(y) %in% my_doublets,]

y <- y[,which(colnames(y) %in% meta_baseline_df$sample_id)]

Compartment <- c('NkT','BEry','Ery','Myeloid','Plasma') #,'Full'
cell_proportions <- list()
for(iii in Compartment){
  ix <- grep(paste('^',iii,sep=''), rownames(y))
  z <- y[ix,]
  cell_proportions[[iii]] = DR_data(t(z))
}
cell_props_comp <- do.call(cbind,cell_proportions)
identical(meta_baseline_df$sample_id,rownames(cell_props_comp))
## [1] FALSE
cell_props_comp <- cell_props_comp[match(meta_baseline_df$sample_id,rownames(cell_props_comp)),]
identical(meta_baseline_df$sample_id,rownames(cell_props_comp))
## [1] TRUE

Prepare Compartments

### All populations
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
y <- y[!rownames(y) %in% my_doublets,]

z <- list()
for(iii in Compartment){
  ix <- grep(paste('^',iii,sep=''), rownames(y))
  z[[iii]] <- colSums(y[ix,])
}    
z <- do.call(rbind,z)
cell_compartment = DR_data(t(z))
### remove non baseline/relevant
cell_compartment <- cell_compartment[which(rownames(cell_compartment) %in% meta_baseline_df$sample_id),]
identical(meta_baseline_df$sample_id,rownames(cell_compartment))
## [1] FALSE
cell_compartment <- cell_compartment[match(meta_baseline_df$sample_id,rownames(cell_compartment)),]
identical(meta_baseline_df$sample_id,rownames(cell_compartment))
## [1] TRUE

Stats for all populations ‘Compartment’ (univariate)

comp_fulldf <- cbind(meta_baseline_df,cell_compartment)
### Make data storage objects
tmp_list_os <- list() ; tmp_os_cox_list <- list()
tmp_list_pfs <- list() ; tmp_pfs_cox_list <- list()
###
count=1 
###
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_risk_per_compartment_summary.pdf',width = 6,height = 6)
for(i in 1:length(Compartment)){
    tmp_df <- comp_fulldf
    tmp_df$y <- tmp_df[,paste(Compartment[i])]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25))] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75))] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25)) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5))] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75)) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5))] <- 'Q3'
    ### PFS Plot
    print(
      ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df), 
                  type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "1",pval.method = TRUE,
                  xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('deeppink1','deeppink4','skyblue4','skyblue1')) +
        labs(title = Compartment[i])
    )
    ### PFS SURV
    tmp_df_pfs <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
    tmp_df_pfs$marker <- paste(Compartment[i])
    tmp_list_pfs[[count]] <- tmp_df_pfs
    ### PFS COX 
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label)  
    tmp_pfs_cox_list[[count]] <- x
    ### OS Plot
    print(
      ggsurvplot( fit = survfit(Surv(ttcos, censos==1) ~ x, data = tmp_df), 
                  type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "1",pval.method = TRUE,
                  xlab = "Days", ylab = "OS Prob.",pval = TRUE,risk.table = TRUE,palette = c('deeppink1','deeppink4','skyblue4','skyblue1')) +
        labs(title = Compartment[i])
    )
    ### SURV OS
    tmp_df_os <- surv_pvalue( survfit(Surv(ttcos, censos==1) ~ x, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
    tmp_df_os$marker <- paste(Compartment[i])
    tmp_list_os[[count]] <- tmp_df_os
    ### COX OS
    x <- coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label)  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
}
dev.off()
## quartz_off_screen 
##                 2
### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Summary results ‘Compartment’ PFS

### Prepare tables PFS
tmp_list_pfs <- do.call(rbind,tmp_list_pfs) ; tmp_pfs_cox_list <- do.call(rbind,tmp_pfs_cox_list) 
rownames(tmp_pfs_cox_list) <- NULL
tmp_list_pfs$pval <- as.numeric(gsub("p = ","",tmp_list_pfs$pval.txt))
tmp_list_pfs$pval.txt <- NULL
###
colnames(tmp_pfs_cox_list) <- paste('cox',colnames(tmp_pfs_cox_list),sep='_')
colnames(tmp_pfs_cox_list)[5] <- 'Marker'
colnames(tmp_list_pfs)[4] <- 'Marker'
### pfs
results_cox_pfs_fit_df <- merge(tmp_pfs_cox_list,tmp_list_pfs,by='Marker')
### pval match
results_cox_pfs_fit_df$pval_match <- NA
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$cox_pval<0.05] <- 'Cox'
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$pval<0.05] <- 'LogR'
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$pval<0.05 & results_cox_pfs_fit_df$cox_pval<0.05] <- 'Cox+LogR'
results_cox_pfs_fit_df$sig_cox <- results_cox_pfs_fit_df$cox_pval<0.05
tmp_df <- results_cox_pfs_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_compartment_pfs_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=Marker),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 10 rows containing missing values (`geom_point()`).

Summary results ‘Compartment’

### Prepare tables PFS
tmp_list_os <- do.call(rbind,tmp_list_os) ; tmp_os_cox_list <- do.call(rbind,tmp_os_cox_list) 
rownames(tmp_os_cox_list) <- NULL
tmp_list_os$pval <- as.numeric(gsub("p = ","",tmp_list_os$pval.txt))
tmp_list_os$pval.txt <- NULL
###
colnames(tmp_os_cox_list) <- paste('cox',colnames(tmp_os_cox_list),sep='_')
colnames(tmp_os_cox_list)[5] <- 'Marker'
colnames(tmp_list_os)[4] <- 'Marker'
### pfs
results_cox_os_fit_df <- merge(tmp_os_cox_list,tmp_list_os,by='Marker')
### pval match
results_cox_os_fit_df$pval_match <- NA
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$cox_pval<0.05] <- 'Cox'
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$pval<0.05] <- 'LogR'
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$pval<0.05 & results_cox_os_fit_df$cox_pval<0.05] <- 'Cox+LogR'
results_cox_os_fit_df$sig_cox <- results_cox_os_fit_df$cox_pval<0.05

OS

tmp_df <- results_cox_os_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='OS\nCox HR',y='OS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='OS\nCox HR',y='OS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing missing values (`geom_text_repel()`).

PFS

tmp_df <- results_cox_pfs_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_compartment_pfs_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 10 rows containing missing values (`geom_point()`).

### store
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_compartment_uni.RDS',results_cox_os_fit_df)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_compartment_uni.RDS',results_cox_pfs_fit_df)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_os_compartment_uni.csv',results_cox_os_fit_df)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_pfs_compartment_uni.csv',results_cox_pfs_fit_df)
### list to store all outcomes
results_cox_surv_list <- list(os_compartment_uni=results_cox_os_fit_df,
                              pfs_compartment_uni=results_cox_pfs_fit_df)

Stats for all populations ‘Compartment’ (Multivariate)

comp_fulldf <- cbind(meta_baseline_df,cell_compartment)
### Make data storage objects
tmp_os_cox_list <- list() ; tmp_pfs_cox_list <- list()
###
count=1 
###
for(i in 1:length(Compartment)){
    tmp_df <- comp_fulldf
    tmp_df$y <- tmp_df[,paste(Compartment[i])]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25))] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75))] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25)) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5))] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75)) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5))] <- 'Q3'
    ### PFS COX 
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label, 
                     model='Univariate')
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS
    x <- coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label, 
                     model='Univariate')  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch')  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex + d_dx_amm_iss_stage')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex + d_dx_amm_iss_stage')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + site + batch + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch + d_pt_sex + d_dx_amm_iss_stage')
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + site + batch + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch + d_pt_sex + d_dx_amm_iss_stage')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
}
tmp_os_cox_list <- do.call(rbind,tmp_os_cox_list) ; tmp_pfs_cox_list <- do.call(rbind,tmp_pfs_cox_list) 
rownames(tmp_os_cox_list) <- NULL ; rownames(tmp_pfs_cox_list) <- NULL
tmp_df <- tmp_os_cox_list
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
###
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2) 
#pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_davies_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(x='OS\nCox HR',y='OS\n-Log10(pval') + theme_classic()
## Warning: Removed 120 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#dev.off()
tmp_df <- tmp_pfs_cox_list
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
###
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2) 
#pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_davies_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(x='PFS\nCox HR',y='PFS\n-Log10(pval') + theme_classic()
## Warning: Removed 120 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#dev.off()
### store
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_compartment_multi.RDS',tmp_os_cox_list)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_compartment_multi.RDS',tmp_pfs_cox_list)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_os_compartment_multi.csv',tmp_os_cox_list)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_pfs_compartment_multi.csv',tmp_pfs_cox_list)
### list to store all outcomes
results_cox_surv_list$os_compartment_multi <- tmp_os_cox_list
results_cox_surv_list$pfs_compartment_multi <- tmp_pfs_cox_list

Stats for all populations ‘Subclusters all’ (Univariate)

subcluster_all_fulldf <- cbind(meta_baseline_df,cell_props_all)
###
my_vars <- colnames(cell_props_all)
### Make data storage objects
tmp_list_os <- list() ; tmp_os_cox_list <- list()
tmp_list_pfs <- list() ; tmp_pfs_cox_list <- list()
###
count=1 
###
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_sub_clusters_all_summary.pdf',width = 6,height = 6)
for(i in 1:length(my_vars)){
    tmp_df <- subcluster_all_fulldf
    tmp_df$y <- tmp_df[,paste(my_vars[i])]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25))] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75))] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25)) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5))] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75)) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5))] <- 'Q3'
    ### PFS Plot
    print(
      ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df), 
                  type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "1",pval.method = TRUE,
                  xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('deeppink1','deeppink4','skyblue4','skyblue1')) +
        labs(title = my_vars[i])
    )
    ### PFS SURV
    tmp_df_pfs <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
    tmp_df_pfs$marker <- paste(my_vars[i])
    tmp_list_pfs[[count]] <- tmp_df_pfs
    ### PFS COX 
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(my_vars[i]) , label=x$table_body$label)  
    tmp_pfs_cox_list[[count]] <- x
    ### OS Plot
    print(
      ggsurvplot( fit = survfit(Surv(ttcos, censos==1) ~ x, data = tmp_df), 
                  type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "1",pval.method = TRUE,
                  xlab = "Days", ylab = "OS Prob.",pval = TRUE,risk.table = TRUE,palette = c('deeppink1','deeppink4','skyblue4','skyblue1')) +
        labs(title = my_vars[i])
    )
    ### SURV OS
    tmp_df_os <- surv_pvalue( survfit(Surv(ttcos, censos==1) ~ x, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
    tmp_df_os$marker <- paste(my_vars[i])
    tmp_list_os[[count]] <- tmp_df_os
    ### COX OS
    x <- coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(my_vars[i]) , label=x$table_body$label)  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
}
dev.off()
## quartz_off_screen 
##                 2

Summary results ‘Subclusters all’

### Prepare tables os
tmp_list_os <- do.call(rbind,tmp_list_os) ; tmp_os_cox_list <- do.call(rbind,tmp_os_cox_list) 
rownames(tmp_os_cox_list) <- NULL
tmp_list_os$pval <- as.numeric(gsub("p = ","",tmp_list_os$pval.txt))
tmp_list_os$pval.txt <- NULL
###
colnames(tmp_os_cox_list) <- paste('cox',colnames(tmp_os_cox_list),sep='_')
colnames(tmp_os_cox_list)[5] <- 'Marker'
colnames(tmp_list_os)[4] <- 'Marker'
### pfs
results_cox_os_fit_df <- merge(tmp_os_cox_list,tmp_list_os,by='Marker')
### pval match
results_cox_os_fit_df$pval_match <- NA
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$cox_pval<0.05] <- 'Cox'
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$pval<0.05] <- 'LogR'
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$pval<0.05 & results_cox_os_fit_df$cox_pval<0.05] <- 'Cox+LogR'
results_cox_os_fit_df$sig_cox <- results_cox_os_fit_df$cox_pval<0.05
### Prepare tables os
tmp_list_pfs <- do.call(rbind,tmp_list_pfs) ; tmp_pfs_cox_list <- do.call(rbind,tmp_pfs_cox_list) 
rownames(tmp_pfs_cox_list) <- NULL
tmp_list_pfs$pval <- as.numeric(gsub("p = ","",tmp_list_pfs$pval.txt))
tmp_list_pfs$pval.txt <- NULL
###
colnames(tmp_pfs_cox_list) <- paste('cox',colnames(tmp_pfs_cox_list),sep='_')
colnames(tmp_pfs_cox_list)[5] <- 'Marker'
colnames(tmp_list_pfs)[4] <- 'Marker'
### pfs
results_cox_pfs_fit_df <- merge(tmp_pfs_cox_list,tmp_list_pfs,by='Marker')
### pval match
results_cox_pfs_fit_df$pval_match <- NA
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$cox_pval<0.05] <- 'Cox'
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$pval<0.05] <- 'LogR'
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$pval<0.05 & results_cox_pfs_fit_df$cox_pval<0.05] <- 'Cox+LogR'
results_cox_pfs_fit_df$sig_cox <- results_cox_pfs_fit_df$cox_pval<0.05

OS

tmp_df <- results_cox_os_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_subcluster_all_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='OS\nCox HR',y='OS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='OS\nCox HR',y='OS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 181 rows containing missing values (`geom_point()`).
## Warning: Removed 31 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

PFS

tmp_df <- results_cox_pfs_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_subcluster_all_pfs_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 181 rows containing missing values (`geom_point()`).
## Warning: Removed 29 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### store
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_all_uni.RDS',results_cox_os_fit_df)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_all_uni.RDS',results_cox_pfs_fit_df)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_os_subcluster_all_uni.csv',results_cox_os_fit_df)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_pfs_subcluster_all_uni.csv',results_cox_pfs_fit_df)
### list to store all outcomes
results_cox_surv_list$os_subcluster_all_unii <- results_cox_os_fit_df
results_cox_surv_list$pfs_subcluster_all_unii <- results_cox_pfs_fit_df

Stats for all populations ‘Subclusters all’ (Multivariate)

# subcluster_all_fulldf
### Make data storage objects
tmp_os_cox_list <- list() ; tmp_pfs_cox_list <- list()
###
Compartment <- colnames(cell_props_all)
###
count=1 
###
for(i in 1:length(Compartment)){
    tmp_df <- subcluster_all_fulldf
    tmp_df$y <- tmp_df[,paste(Compartment[i])]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25))] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75))] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25)) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5))] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75)) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5))] <- 'Q3'
    ### PFS COX 
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label, 
                     model='Univariate')
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS
    x <- coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label, 
                     model='Univariate')  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch')  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex + d_dx_amm_iss_stage')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex + d_dx_amm_iss_stage')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + site + batch + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch + d_pt_sex + d_dx_amm_iss_stage')
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + site + batch + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch + d_pt_sex + d_dx_amm_iss_stage')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
}
tmp_os_cox_list <- do.call(rbind,tmp_os_cox_list) ; tmp_pfs_cox_list <- do.call(rbind,tmp_pfs_cox_list) 
rownames(tmp_os_cox_list) <- NULL ; rownames(tmp_pfs_cox_list) <- NULL
tmp_df <- tmp_os_cox_list
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
###
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2) 
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
#pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_davies_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(x='OS\nCox HR',y='OS\n-Log10(pval') + theme_classic()
## Warning: Removed 2136 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 332 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#dev.off()
tmp_df <- tmp_pfs_cox_list
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
###
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2) 
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
#pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_davies_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(x='PFS\nCox HR',y='PFS\n-Log10(pval') + theme_classic()
## Warning: Removed 2136 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 482 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#dev.off()
### store
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_all_multi.RDS',tmp_os_cox_list)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_all_multi.RDS',tmp_pfs_cox_list)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_os_subcluster_all_multi.csv',tmp_os_cox_list)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_pfs_subcluster_all_multi.csv',tmp_pfs_cox_list)
### list to store all outcomes
results_cox_surv_list$os_subcluster_all_multi <- tmp_os_cox_list
results_cox_surv_list$pfs_subcluster_all_multi <- tmp_pfs_cox_list

Stats for all populations ‘Subclusters per Compartment’ (Univariate)

subcluster_comp_fulldf <- cbind(meta_baseline_df,cell_props_comp)
###
my_vars <- colnames(cell_props_comp)
### Make data storage objects
tmp_list_os <- list() ; tmp_os_cox_list <- list()
tmp_list_pfs <- list() ; tmp_pfs_cox_list <- list()
###
count=1 
###
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_sub_clusters_compartment_summary.pdf',width = 6,height = 6)
for(i in 1:length(my_vars)){
    tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,paste(my_vars[i])]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
    ### PFS Plot
    print(
      ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df), 
                  type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "1",pval.method = TRUE,
                  xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('deeppink1','deeppink4','skyblue4','skyblue1')) +
        labs(title = my_vars[i])
    )
    ### PFS SURV
    tmp_df_pfs <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
    tmp_df_pfs$marker <- paste(my_vars[i])
    tmp_list_pfs[[count]] <- tmp_df_pfs
    ### PFS COX 
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(my_vars[i]) , label=x$table_body$label)  
    tmp_pfs_cox_list[[count]] <- x
    ### OS Plot
    print(
      ggsurvplot( fit = survfit(Surv(ttcos, censos==1) ~ x, data = tmp_df), 
                  type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "1",pval.method = TRUE,
                  xlab = "Days", ylab = "OS Prob.",pval = TRUE,risk.table = TRUE,palette = c('deeppink1','deeppink4','skyblue4','skyblue1')) +
        labs(title = my_vars[i])
    )
    ### SURV OS
    tmp_df_os <- surv_pvalue( survfit(Surv(ttcos, censos==1) ~ x, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
    tmp_df_os$marker <- paste(my_vars[i])
    tmp_list_os[[count]] <- tmp_df_os
    ### COX OS
    x <- coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(my_vars[i]) , label=x$table_body$label)  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
}
dev.off()
## quartz_off_screen 
##                 2

Summary results ‘Subclusters Compartment’

### Prepare tables os
tmp_list_os <- do.call(rbind,tmp_list_os) ; tmp_os_cox_list <- do.call(rbind,tmp_os_cox_list) 
rownames(tmp_os_cox_list) <- NULL
tmp_list_os$pval <- as.numeric(gsub("p = ","",tmp_list_os$pval.txt))
tmp_list_os$pval.txt <- NULL
###
colnames(tmp_os_cox_list) <- paste('cox',colnames(tmp_os_cox_list),sep='_')
colnames(tmp_os_cox_list)[5] <- 'Marker'
colnames(tmp_list_os)[4] <- 'Marker'
### pfs
results_cox_os_fit_df <- merge(tmp_os_cox_list,tmp_list_os,by='Marker')
### pval match
results_cox_os_fit_df$pval_match <- NA
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$cox_pval<0.05] <- 'Cox'
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$pval<0.05] <- 'LogR'
results_cox_os_fit_df$pval_match[results_cox_os_fit_df$pval<0.05 & results_cox_os_fit_df$cox_pval<0.05] <- 'Cox+LogR'
results_cox_os_fit_df$sig_cox <- results_cox_os_fit_df$cox_pval<0.05
### Prepare tables pfs
tmp_list_pfs <- do.call(rbind,tmp_list_pfs) ; tmp_pfs_cox_list <- do.call(rbind,tmp_pfs_cox_list) 
rownames(tmp_pfs_cox_list) <- NULL
tmp_list_pfs$pval <- as.numeric(gsub("p = ","",tmp_list_pfs$pval.txt))
tmp_list_pfs$pval.txt <- NULL
###
colnames(tmp_pfs_cox_list) <- paste('cox',colnames(tmp_pfs_cox_list),sep='_')
colnames(tmp_pfs_cox_list)[5] <- 'Marker'
colnames(tmp_list_pfs)[4] <- 'Marker'
### pfs
results_cox_pfs_fit_df <- merge(tmp_pfs_cox_list,tmp_list_pfs,by='Marker')
### pval match
results_cox_pfs_fit_df$pval_match <- NA
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$cox_pval<0.05] <- 'Cox'
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$pval<0.05] <- 'LogR'
results_cox_pfs_fit_df$pval_match[results_cox_pfs_fit_df$pval<0.05 & results_cox_pfs_fit_df$cox_pval<0.05] <- 'Cox+LogR'
results_cox_pfs_fit_df$sig_cox <- results_cox_pfs_fit_df$cox_pval<0.05

OS

tmp_df <- results_cox_os_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
tmp_df$cox_HR[tmp_df$cox_HR<0.1] <- 0.1
###
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_subcluster_comp_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='OS\nCox HR',y='OS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='OS\nCox HR',y='OS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 179 rows containing missing values (`geom_point()`).
## Warning: Removed 30 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

PFS

tmp_df <- results_cox_pfs_fit_df
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$cox_label,sep='.')
tmp_df$cox_HR[tmp_df$cox_HR<0.1] <- 0.1
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_unsupervised_surv_coxph_subcluster_comp_pfs_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=tmp_df)+aes(x=cox_HR,y=-log10(pval),shape=(sig_cox)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[tmp_df$pval_match %in% c('Cox+LogR','LogR','Cox'),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(shape='(Cox)p<0.05',color='p<0.05',x='PFS\nCox HR',y='PFS\n-Log10(p-log-rank)') + theme_classic()
## Warning: Removed 179 rows containing missing values (`geom_point()`).
## Warning: Removed 33 rows containing missing values (`geom_text_repel()`).
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### store
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_comp_uni.RDS',results_cox_os_fit_df)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_comp_uni.RDS',results_cox_pfs_fit_df)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_os_subcluster_comp_uni.csv',results_cox_os_fit_df)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_pfs_subcluster_comp_uni.csv',results_cox_pfs_fit_df)
### list to store all outcomes
results_cox_surv_list$os_subcluster_comp_uni <- results_cox_os_fit_df
results_cox_surv_list$pfs_subcluster_comp_uni <- results_cox_pfs_fit_df

Stats for all populations ‘Subclusters Comp’ (Multivariate)

# subcluster_comp_fulldf
### Make data storage objects
tmp_os_cox_list <- list() ; tmp_pfs_cox_list <- list()
###
Compartment <- colnames(cell_props_comp)
###
count=1 
###
for(i in 1:length(Compartment)){
    tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,paste(Compartment[i])]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
    ### PFS COX 
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label, 
                     model='Univariate')
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS
    x <- coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label, 
                     model='Univariate')  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch')  
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex + d_dx_amm_iss_stage')  
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ d_pt_sex + d_dx_amm_iss_stage')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
    
    ### PFS COX sex
    x <- coxph(Surv(ttcpfs, censpfs==1) ~ x + site + batch + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch + d_pt_sex + d_dx_amm_iss_stage')
    tmp_pfs_cox_list[[count]] <- x
    ### COX OS sex
    x <- coxph(Surv(ttcos, censos==1) ~ x + site + batch + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
    x <- data.frame( pval = x$table_body$p.value, CI95_high = x$table_body$conf.high, CI95_low = x$table_body$conf.low,
                     HR = x$table_body$estimate , Marker = paste(Compartment[i]) , label=x$table_body$label,
                     model='+ site + batch + d_pt_sex + d_dx_amm_iss_stage')
    tmp_os_cox_list[[count]] <- x
    
    count=count+1
}
tmp_os_cox_list <- do.call(rbind,tmp_os_cox_list) ; tmp_pfs_cox_list <- do.call(rbind,tmp_pfs_cox_list) 
rownames(tmp_os_cox_list) <- NULL ; rownames(tmp_pfs_cox_list) <- NULL
tmp_df <- tmp_os_cox_list
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
###
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2) 
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
#pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_davies_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(x='OS\nCox HR',y='OS\n-Log10(pval') + theme_classic()
## Warning: Removed 2112 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 383 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#dev.off()
tmp_df <- tmp_pfs_cox_list
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
###
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2) 
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
#pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_davies_surv_coxph_compartment_os_summary.pdf',width = 5,height = 3)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() + 
  geom_hline(yintercept = 1.3,linetype=2,color='red') +
  geom_vline(xintercept = 1,linetype=2,color='black') +
  #scale_color_manual(values = c('deeppink1','darkorange')) + 
  scale_x_log10() +
  geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
  labs(x='PFS\nCox HR',y='PFS\n-Log10(pval') + theme_classic()
## Warning: Removed 2112 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 476 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

#dev.off()
### store
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_comp_multi.RDS',tmp_os_cox_list)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_comp_multi.RDS',tmp_pfs_cox_list)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_os_subcluster_comp_multi.csv',tmp_os_cox_list)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/tables/results_cox_surv_pfs_subcluster_comp_multi.csv',tmp_pfs_cox_list)
### list to store all outcomes
results_cox_surv_list$os_subcluster_comp_multi <- tmp_os_cox_list
results_cox_surv_list$pfs_subcluster_comp_multi <- tmp_pfs_cox_list
### clean space
rm(list=setdiff(ls(), c('results_cox_surv_list','cellprop_fulldf','cell_props_all','cell_props_comp','meta_baseline_df','subcluster_all_fulldf','subcluster_comp_fulldf') ))

Compare markers significant between stat experiments (OS all subclusters)

###
a<-results_cox_surv_list$os_subcluster_all_multi
b<-results_cox_surv_list$os_subcluster_all_unii

x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
uni=unique(b$Marker[which(b$pval<0.05)]))

subcluster_all_sig_markers <- Reduce(intersect,x)
subcluster_all_sig_markers
##  [1] "BEry.1"     "BEry.2"     "BEry.3"     "BEry.16"    "Myeloid.2" 
##  [6] "Myeloid.12" "NkT.1.1"    "NkT.1.4"    "NkT.2.0"    "NkT.2.2"   
## [11] "NkT.3.1"    "NkT.6"      "Plasma.7"   "Plasma.21"

Compare markers significant between stat experiments (OS per compartment)

a<-results_cox_surv_list$os_subcluster_comp_multi
b<-results_cox_surv_list$os_subcluster_comp_uni

x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
        uni=unique(b$Marker[which(b$pval<0.05)]))

subcluster_comp_sig_markers <- Reduce(intersect,x)
subcluster_comp_sig_markers
##  [1] "NkT.1.4"    "NkT.3.1"    "NkT.6"      "NkT.13"     "BEry.1"    
##  [6] "BEry.2"     "BEry.7"     "BEry.15.0"  "BEry.16"    "Ery.5"     
## [11] "Myeloid.12" "Plasma.4"   "Plasma.21"

Significant between both All and Compartment models

x<-list(all=subcluster_all_sig_markers,comp=subcluster_comp_sig_markers)
final_list_os <- Reduce(intersect,x)
final_list_os
## [1] "BEry.1"     "BEry.2"     "BEry.16"    "Myeloid.12" "NkT.1.4"   
## [6] "NkT.3.1"    "NkT.6"      "Plasma.21"

Compare markers significant between stat experiments (PFS all subclusters)

###
a<-results_cox_surv_list$pfs_subcluster_all_multi
b<-results_cox_surv_list$pfs_subcluster_all_unii

x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
uni=unique(b$Marker[which(b$pval<0.05)]))

subcluster_all_sig_markers_pfs <- Reduce(intersect,x)
subcluster_all_sig_markers_pfs
##  [1] "BEry.1"     "BEry.3"     "BEry.9"     "BEry.15.1"  "Myeloid.2" 
##  [6] "Myeloid.5"  "Myeloid.12" "NkT.1.1"    "NkT.2.0"    "NkT.6"     
## [11] "Plasma.4"   "Plasma.15"  "Plasma.21"

Compare markers significant between stat experiments (PFS per compartment)

a<-results_cox_surv_list$pfs_subcluster_comp_multi
b<-results_cox_surv_list$pfs_subcluster_comp_uni

x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
        uni=unique(b$Marker[which(b$pval<0.05)]))

subcluster_comp_sig_markers_pfs <- Reduce(intersect,x)
subcluster_comp_sig_markers_pfs
##  [1] "NkT.0"      "NkT.2.3"    "NkT.6"      "BEry.1"     "BEry.3"    
##  [6] "BEry.9"     "BEry.15.0"  "BEry.15.1"  "Myeloid.5"  "Myeloid.12"
## [11] "Plasma.4"   "Plasma.15"  "Plasma.21"
x<-list(all=subcluster_all_sig_markers_pfs,comp=subcluster_comp_sig_markers_pfs)
final_list_pfs <- Reduce(intersect,x)
final_list_pfs
##  [1] "BEry.1"     "BEry.3"     "BEry.9"     "BEry.15.1"  "Myeloid.5" 
##  [6] "Myeloid.12" "NkT.6"      "Plasma.4"   "Plasma.15"  "Plasma.21"

Subclusters representation in data

file_717_per_cell_md <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/per_cell_md.rds')
x <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(x) <- 'matrix' ; x[x>0]<-1 ; x <- rowSums(x) ; x <- sort(x)
sample_size_df <- data.frame(subcluster=names(x),samples=as.numeric(x))
sample_size_df$more_than_median <- sample_size_df$samples > median(sample_size_df$samples)
final_list_pfs[which(final_list_pfs %in% sample_size_df$subcluster[which(sample_size_df$more_than_median %in% TRUE )])]
## [1] "BEry.1"    "BEry.3"    "Myeloid.5" "NkT.6"
final_list_os[which(final_list_os %in% sample_size_df$subcluster[which(sample_size_df$more_than_median %in% TRUE )])]
## [1] "BEry.1"  "BEry.2"  "NkT.1.4" "NkT.3.1" "NkT.6"

Testing significant pops (OS)

NkT.1.4

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"NkT.1.4"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX OS
coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
x


    Q1 — —
    Q2 0.54 0.30, 0.98 0.042
    Q3 0.94 0.55, 1.60 0.8
    Q4 0.27 0.13, 0.56 <0.001
1 HR = Hazard Ratio, CI = Confidence Interval
### COX OS
coxph(Surv(ttcos, censos==1) ~ x, data = tmp_df)
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ x, data = tmp_df)
## 
##         coef exp(coef) se(coef)      z        p
## xQ2 -0.61207   0.54223  0.30042 -2.037 0.041609
## xQ3 -0.05913   0.94258  0.27024 -0.219 0.826794
## xQ4 -1.30686   0.27067  0.36743 -3.557 0.000375
## 
## Likelihood ratio test=18.87  on 3 df, p=0.0002903
## n= 230, number of events= 83
### COX OS
coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex, data = tmp_df)
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ x + d_pt_sex, data = tmp_df)
## 
##                  coef exp(coef) se(coef)      z        p
## xQ2          -0.61771   0.53918  0.30043 -2.056 0.039775
## xQ3          -0.04681   0.95427  0.27044 -0.173 0.862584
## xQ4          -1.30684   0.27067  0.36740 -3.557 0.000375
## d_pt_sexmale  0.26713   1.30621  0.23466  1.138 0.254973
## 
## Likelihood ratio test=20.21  on 4 df, p=0.0004545
## n= 230, number of events= 83
### COX OS
coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage, data = tmp_df)
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ x + d_pt_sex + d_dx_amm_iss_stage, 
##     data = tmp_df)
## 
##                        coef exp(coef) se(coef)      z        p
## xQ2                -0.48418   0.61620  0.30544 -1.585  0.11292
## xQ3                -0.05607   0.94548  0.27791 -0.202  0.84012
## xQ4                -1.19720   0.30204  0.37168 -3.221  0.00128
## d_pt_sexmale        0.26146   1.29882  0.23984  1.090  0.27566
## d_dx_amm_iss_stage  0.76699   2.15329  0.15781  4.860 1.17e-06
## 
## Likelihood ratio test=42.56  on 5 df, p=4.528e-08
## n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
### COX OS
coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site, data = tmp_df)
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ x + d_pt_sex + d_dx_amm_iss_stage + 
##     site, data = tmp_df)
## 
##                       coef exp(coef) se(coef)      z        p
## xQ2                -0.4923    0.6112   0.3054 -1.612  0.10689
## xQ3                -0.0426    0.9583   0.2782 -0.153  0.87830
## xQ4                -1.1535    0.3155   0.3752 -3.074  0.00211
## d_pt_sexmale        0.2314    1.2603   0.2444  0.947  0.34385
## d_dx_amm_iss_stage  0.8149    2.2588   0.1629  5.001 5.69e-07
## siteMAYO           -0.1437    0.8661   0.3725 -0.386  0.69953
## siteMSSM           -0.3772    0.6858   0.3731 -1.011  0.31201
## siteWUSTL           0.1396    1.1498   0.3315  0.421  0.67374
## 
## Likelihood ratio test=45.67  on 8 df, p=2.742e-07
## n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
### COX OS
coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df)
## Call:
## coxph(formula = Surv(ttcos, censos == 1) ~ x + d_pt_sex + d_dx_amm_iss_stage + 
##     site + batch, data = tmp_df)
## 
##                        coef exp(coef) se(coef)      z        p
## xQ2                -0.51243   0.59904  0.30784 -1.665  0.09599
## xQ3                -0.03495   0.96566  0.28510 -0.123  0.90244
## xQ4                -1.08243   0.33877  0.37752 -2.867  0.00414
## d_pt_sexmale        0.25413   1.28933  0.24746  1.027  0.30445
## d_dx_amm_iss_stage  0.81503   2.25924  0.16573  4.918 8.75e-07
## siteMAYO           -0.14305   0.86671  0.37301 -0.383  0.70135
## siteMSSM           -0.33856   0.71279  0.37859 -0.894  0.37118
## siteWUSTL           0.13142   1.14045  0.36239  0.363  0.71687
## batchB2            -0.48915   0.61314  0.31421 -1.557  0.11953
## batchB3            -0.23264   0.79243  0.32068 -0.725  0.46816
## batchB4            -0.06535   0.93674  0.39452 -0.166  0.86844
## 
## Likelihood ratio test=48.43  on 11 df, p=1.197e-06
## n= 220, number of events= 80 
##    (10 observations deleted due to missingness)
### COX OS
coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
x


    Q1 — —
    Q2 0.60 0.33, 1.10 0.10
    Q3 0.97 0.55, 1.69 >0.9
    Q4 0.34 0.16, 0.71 0.004
d_pt_sex


    female — —
    male 1.29 0.79, 2.09 0.3
d_dx_amm_iss_stage 2.26 1.63, 3.13 <0.001
site


    EMORY — —
    MAYO 0.87 0.42, 1.80 0.7
    MSSM 0.71 0.34, 1.50 0.4
    WUSTL 1.14 0.56, 2.32 0.7
batch


    B1 — —
    B2 0.61 0.33, 1.14 0.12
    B3 0.79 0.42, 1.49 0.5
    B4 0.94 0.43, 2.03 0.9
1 HR = Hazard Ratio, CI = Confidence Interval
### COX OS
forest_model( coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

BEry.1

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"BEry.1"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX OS
forest_model( coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

BEry.2

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"BEry.2"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX OS
forest_model( coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

NkT.6

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"NkT.6"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX OS
forest_model( coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

NkT.3.1

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"NkT.3.1"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX OS
forest_model( coxph(Surv(ttcos, censos==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

Testing significant pops (PFS)

Myeloid.5

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"Myeloid.5"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX pfs
coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
x


    Q1 — —
    Q2 1.04 0.62, 1.75 0.9
    Q3 1.05 0.63, 1.73 0.9
    Q4 1.84 1.06, 3.17 0.029
d_pt_sex


    female — —
    male 1.03 0.71, 1.49 0.9
d_dx_amm_iss_stage 1.66 1.31, 2.11 <0.001
site


    EMORY — —
    MAYO 1.02 0.54, 1.92 >0.9
    MSSM 0.88 0.51, 1.50 0.6
    WUSTL 1.24 0.71, 2.18 0.4
batch


    B1 — —
    B2 0.55 0.34, 0.89 0.015
    B3 1.05 0.67, 1.64 0.8
    B4 0.72 0.37, 1.39 0.3
1 HR = Hazard Ratio, CI = Confidence Interval
### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

Myeloid.12

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"Myeloid.12"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX pfs
coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
x


    Q3 — —
    Q4 1.42 0.95, 2.11 0.086
d_pt_sex


    female — —
    male 1.01 0.70, 1.47 >0.9
d_dx_amm_iss_stage 1.65 1.30, 2.09 <0.001
site


    EMORY — —
    MAYO 0.72 0.41, 1.26 0.2
    MSSM 0.82 0.48, 1.41 0.5
    WUSTL 0.90 0.51, 1.57 0.7
batch


    B1 — —
    B2 0.52 0.33, 0.83 0.006
    B3 0.97 0.62, 1.51 0.9
    B4 0.70 0.37, 1.35 0.3
1 HR = Hazard Ratio, CI = Confidence Interval
### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

BEry.3

tmp_df <- subcluster_comp_fulldf
    tmp_df$y <- tmp_df[,"BEry.3"]
    tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
### COX pfs
coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) %>% gtsummary::tbl_regression(exp = TRUE)
Characteristic HR1 95% CI1 p-value
x


    Q1 — —
    Q2 0.89 0.54, 1.45 0.6
    Q3 0.53 0.32, 0.89 0.016
    Q4 0.99 0.63, 1.57 >0.9
d_pt_sex


    female — —
    male 1.12 0.77, 1.62 0.6
d_dx_amm_iss_stage 1.61 1.26, 2.04 <0.001
site


    EMORY — —
    MAYO 0.81 0.46, 1.43 0.5
    MSSM 0.91 0.53, 1.55 0.7
    WUSTL 1.10 0.64, 1.89 0.7
batch


    B1 — —
    B2 0.51 0.32, 0.81 0.005
    B3 0.92 0.59, 1.45 0.7
    B4 0.56 0.29, 1.09 0.087
1 HR = Hazard Ratio, CI = Confidence Interval
### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

Myeloid.12 for example nomogram

tmp_df <- subcluster_comp_fulldf
    #tmp_df$y <- tmp_df[,"Myeloid.12"]
    #tmp_df$x[tmp_df$y < quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE)] <- 'Q1'
    #tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    #tmp_df$x[tmp_df$y >= quantile(tmp_df$y, probs=c(0.25),na.rm = TRUE) & tmp_df$y <= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q2'
    #tmp_df$x[tmp_df$y <= quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE) & tmp_df$y >= quantile(tmp_df$y, probs=c(0.5),na.rm = TRUE)] <- 'Q3'
    tmp_df$y <- tmp_df[,"Myeloid.12"]
    tmp_df$x <- 'Low'
    tmp_df$x[tmp_df$y > median(tmp_df$y)] <- 'High'
    #tmp_df$x[tmp_df$y > mean(tmp_df$y)] <- 'High'
    #tmp_df$x[tmp_df$y > quantile(tmp_df$y, probs=c(0.75),na.rm = TRUE)] <- 'Q4'
    #tmp_df$x[tmp_df$y > 0.01] <- 'Extreme'
### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + davies_based_risk + d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + davies_based_risk + d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age, data = tmp_df) )

library(rms)

tmp_df$Myeloid.12.Score <- factor(tmp_df$x,levels = c('Low','High'))
tmp_df$Cytogenetic.Risk <- as.character(tmp_df$davies_based_risk)
tmp_df$Cytogenetic.Risk[tmp_df$Cytogenetic.Risk %in% 'standard_risk'] <- 'Standard'
tmp_df$Cytogenetic.Risk[tmp_df$Cytogenetic.Risk %in% 'high_risk'] <- 'High'
tmp_df$Cytogenetic.Risk <- factor(tmp_df$Cytogenetic.Risk,levels = c('Standard','High'))
tmp_df$Sex <- factor(tmp_df$d_pt_sex,levels = c('female','male'))
tmp_df$ISS.Stage <- tmp_df$d_dx_amm_iss_stage
tmp_df$Age <- tmp_df$d_dx_amm_age

tmp_df_V2 <- tmp_df[,which(colnames(tmp_df) %in% c('censpfs','Myeloid.12.Score','Cytogenetic.Risk','Sex','ISS.Stage','Age'))]

ddist <- datadist(tmp_df_V2)
options(datadist='ddist')

mod.bi <- lrm(censpfs ~ Myeloid.12.Score + Cytogenetic.Risk + ISS.Stage + Age , tmp_df_V2, x=TRUE)

nom.bi <- nomogram(mod.bi,#lp.at=seq(-3,4,by=0.5),
                   fun = plogis, #fun=function(x)1/(1+exp(-x)),#fun.at=c(.001,.01,.05,seq(.1,.9,by=.1),.95,.99,.999),
                   funlabel="Risk of Progression" #conf.int=c(0.1,0.5),#abbrev=FALSE,#minlength=1,lp=F
                   )
pdf(file="/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/myeloid12.progressio.risk.nonogram.pdf",width = 7.5,height = 5)
plot(nom.bi,
     col.grid = gray(c(0.8, 0.95)))
dev.off()
## quartz_off_screen 
##                 2
#Predict(mod.bi)
tmp_df$pred <- predict(object = mod.bi, tmp_df_V2[,-1],se.fit = TRUE,type="fitted.ind")
tmp_df$pred[tmp_df$pred > 0.6] <- 'Integrated High Risk'
tmp_df$pred[!tmp_df$pred %in% 'Integrated High Risk'] <- 'Integrated Low Risk'
### PFS COX 
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ pred + d_pt_sex + site + batch, data = tmp_df) )

ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df), 
            type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
            xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('darkgreen','purple'))